Code
library(tidyverse)library(tidyverse)Read in the full project structure from the project file to map experiment names to numbers.
proj <- jsonlite::read_json("data/project_1136_structure.json")
exp_data <- purrr::map_df(proj, \(comp) {
if (comp$component_type == "exp") {
data.frame(
exp_id = comp$id,
name = comp$name,
res_name = comp$res_name,
instructions = comp$instructions,
question = comp$question,
exptype = comp$exptype,
trial_order = comp$trial_order,
total_stim = comp$total_stim,
random_stim = comp$random_stim,
trials = length(comp$trial),
stim = length(comp$stim)
)
} else {
NULL
}
}) |>
mutate(exp = sub("ManyFaces? Pilot Ratings: ", "", res_name) |> trimws())
trial_data <- purrr::map_df(proj, \(comp) {
if (comp$component_type == "exp") {
purrr::map_df(comp$trial, \(trial) {
data.frame(
exp_id = comp$id,
n = trial$trial_n,
name = trial$name,
img_id = trial$center_img,
img_path = comp$stimuli[[as.character(trial$center_img)]]
)
})
} else {
NULL
}
}) |>
mutate(name = sub("^(manyfaces|attention_checks)/", "", name))exp_data |>
select(exp_id, exp, question, trials) |>
arrange(exp)This workflow requires the data-raw directory, which is not shared on github.
This is the SQL for downloading the data from Experimentum. We need to download in chunks of 50000 rows to avoid file download limits on the site (not needed if downloading directly from SQL).
SELECT
session.id as session_id, project_id, exp.res_name as exp_name, exp_id,
session.user_id, user.sex as user_sex, user.status as user_status,
ROUND(DATEDIFF(ed.dt, REPLACE(birthday, "-00","-01"))/365.25, 1) AS user_age,
trial.name as trial_name,
trial_n,
`order`,
dv,
rt,
ed.side,
ed.dt
FROM session
LEFT JOIN user USING (user_id)
LEFT JOIN exp_data AS ed ON ed.session_id = session.id
LEFT JOIN exp ON exp.id = ed.exp_id
LEFT JOIN trial USING (exp_id, trial_n)
WHERE session.project_id = 1136
AND user.status IN ("guest", "registered")
AND exp_id IN (1384, 1400, 1399, 1398, 1401, 1402, 1403,
1404, 1405, 1397, 1390, 1389, 1388, 1387,
1386, 1385, 1382, 1381, 1380, 1379, 1377)
LIMIT 50000
OFFSET 0# combine multiple downloads into one file
exp_raw <- list.files("data-raw/exp", full.names = TRUE) |>
read_csv(show_col_types = FALSE) |>
unique() |>
filter(user_status %in% c("guest", "registered"))
write_csv(exp_raw, paste0("data-raw/ManyFaces-Pilot-Ratings-exps_", Sys.Date(), ".csv"))
# get most recent files
exp_file <- list.files("data-raw", "ManyFaces-Pilot-Ratings-exps",
full.names = TRUE) |>
sort(decreasing = TRUE) |>
pluck(1)
exp_raw <- read_csv(exp_file, show_col_types = FALSE) |>
filter(user_status %in% c("guest", "registered")) |>
unique()
# get most recent files
quest_file <- list.files("data-raw", "ManyFaces-Pilot-Ratings-quests",
full.names = TRUE) |>
sort(decreasing = TRUE) |>
pluck(1)
quest_raw <- read_csv(quest_file, show_col_types = FALSE) |>
filter(user_status %in% c("guest", "registered")) |>
unique()
# write to the data directory
write_csv(exp_raw, "data/manyfaces-pilot-exp.csv")
write_csv(quest_raw, "data/manyfaces-pilot-quest.csv")exp_raw <- read_csv("data/manyfaces-pilot-exp.csv", show_col_types = FALSE)
quest_raw <- read_csv("data/manyfaces-pilot-quest.csv", show_col_types = FALSE)ed <- exp_data |> select(exp_id, exp)
exp_long <- exp_raw |>
select(session_id, exp_id, trial_name, dv, rt, dt) |>
unique() |>
mutate(trial_name = sub("^(manyfaces|attention_checks)/", "", trial_name)) |>
left_join(ed, by = "exp_id")Investigate distribution of RTs
ggplot(exp_long, aes(x = rt)) +
geom_histogram(bins = 100) +
scale_x_log10(breaks = 10^(1:6),
labels = c("10ms", "100ms", "1s", "10s", "100s", "1000s"))And median RTs for peole who did at least 100 trials.
med_rt_100 <- exp_long |>
summarise(med_rt = median(rt),
n = n(),
.by = session_id) |>
filter(n > 100)
ggplot(med_rt_100, aes(x = med_rt)) +
geom_histogram(bins = 100) +
scale_x_continuous(breaks = seq(0, 10000, 500)) +
labs(x = "Median Reaction Time (ms)")Set RT median cutoff at 1% quantile?
rt_cutoff <- quantile(med_rt_100$med_rt, probs = 0.01)# Calculate median RT and number of trials completed
ed <- select(exp_data, exp_id, trials)
rt <- exp_long |>
summarise(start = min(dt),
median_rt = median(rt),
mfvp = (table(dv) |> sort() |> tail(1) |> as.vector())/n(),
n = n(),
.by = c(session_id, exp_id)) |>
left_join(ed, by = "exp_id")# Check the attention checks
checks <- exp_long |>
select(session_id:dv) |>
filter(grepl("check", trial_name)) |>
mutate(check_type = sub("check_[a-z0-9-]+_", "", trial_name),
check_type = ifelse(exp_id == 1400, substr(check_type, 4, 6), check_type)) |>
summarise(checks_passed = mean(check_type == dv),
.by = c("session_id", "exp_id"))# Check try question
try <- quest_raw |>
filter(q_name == "try") |>
select(session_id, try = dv) |>
unique()# combine to determine who gets excluded
start_date <- "2025-05-06"
to_exclude <- rt |>
left_join(checks, by = c("session_id", "exp_id")) |>
left_join(try, by = "session_id") |>
filter(
n != trials |
interval(start, start_date) |> as.numeric("days") > 0 |
is.na(checks_passed) |
checks_passed < 5/7 |
mfvp > 0.9 |
median_rt < rt_cutoff |
try != 2
) to_exclude |>
mutate(reason = case_when(
n < trials ~ "did not finish trials",
n > trials ~ "too many trials",
interval(start, start_date) |> as.numeric("days") > 0 ~ "date",
checks_passed < 5/7 ~ "passed < 5/7 checks",
mfvp > .9 ~ ">90% same response",
median_rt < rt_cutoff ~ paste0("median RT < ", round(rt_cutoff)),
try == 1 ~ "did not try",
.default = "other")) |>
count(reason, sort = TRUE)Exclude 234 people and remove attention checks
exp <- anti_join(exp_long, to_exclude, by = c("session_id", "exp_id")) |>
filter(!grepl("check_", trial_name))endtimes <- quest_raw |>
summarise(end = max(endtime), .by = c(session_id))
times <- exp |>
summarise(start = min(dt), .by = c(session_id)) |>
left_join(endtimes, by = "session_id") |>
mutate(duration = interval(start, end) |> as.numeric("minutes"))Number of remaining participants per study
There are 1922 included participants.
exp |>
summarise(.by = c(exp, session_id)) |>
count(exp)quest <- anti_join(quest_raw, to_exclude, by = c("session_id")) |>
select(session_id, q_name, dv, endtime) |>
unique() |>
pivot_wider(names_from = q_name, values_from = dv) |>
mutate(age = as.integer(age))ggplot(quest, aes(x = age, fill = gender)) +
geom_histogram(binwidth = 1) +
scale_fill_manual(values = c("hotpink", "lightblue", "orchid"))count(quest, residence, sort = TRUE)quest |>
mutate(ethnicity = tolower(ethnicity)) |>
count(ethnicity, sort = TRUE)count(quest, device, sort = TRUE)rainbow <- c("firebrick", "darkorange", "goldenrod", "darkgreen", "dodgerblue3", "darkorchid4")exp_levels <- c("attractive", "trustworthy", "dominant",
"memorable", "gender-typical")
exp |>
filter(exp_id %in% 1377:1382) |>
mutate(dv = as.integer(dv),
exp = factor(exp, exp_levels)) |>
ggplot(aes(x = dv, fill = exp)) +
geom_histogram(binwidth = 1, show.legend = FALSE, color = "black") +
facet_wrap(~exp, ncol = 3, axes = "all_x", drop = FALSE) +
labs(title = "Standardised Neutral Ratings",
x = "") +
scale_x_continuous(breaks = 1:7) +
scale_fill_manual(values = rainbow, drop = FALSE)# function to create heatmap visualisations
heatmap <- function(id, label) {
exp |>
filter(exp %in% id) |>
separate(trial_name, c("lab", "id"), extra = "drop") |>
count(lab, id, dv) |>
ggplot(aes(x = dv, y = id, fill = n)) +
geom_tile() +
facet_wrap(~lab) +
scale_fill_viridis_c() +
labs(x = label, y = NULL,
title = paste(label, "Ratings")) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90))
}heatmap("attractive", "Attractiveness")heatmap("trustworthy", "Trustworthiness")heatmap("dominant", "Dominance")heatmap("memorable", "Memorableness")heatmap("gender-typical", "Gender Typicality")exp_labels <- c("attractive", "trustworthy", "dominant")
exp_levels <- paste(exp_labels, "(unstd)")
exp |>
filter(exp_id %in% 1397:1399) |>
mutate(dv = as.integer(dv),
exp = factor(exp, exp_levels, exp_labels)) |>
ggplot(aes(x = dv, fill = exp)) +
geom_histogram(binwidth = 1, show.legend = FALSE, color = "black") +
facet_wrap(~exp, ncol = 5, drop = FALSE) +
labs(title = "Unstandardised Neutral Ratings",
x = "") +
scale_x_continuous(breaks = 1:7) +
scale_fill_manual(values = rainbow, drop = FALSE)heatmap("attractive (unstd)", "Attractiveness (Unstandardised)")heatmap("trustworthy (unstd)", "Trustworthiness (Unstandardised)")heatmap("dominant (unstd)", "Dominance (Unstandardised)")dv_levels <- c("anger", "disgust", "fear",
"happiness", "sadness", "surprise", "other")
emo_levels <- c("ang", "dis", "fea", "hap", "sad", "sur")
emo_labels <- paste(dv_levels[1:6], "faces")
exp |>
filter(exp_id %in% c(1384, 1401:1405)) |>
separate(trial_name, c("lab", "model", "type", "emo", "view")) |>
mutate(dv = factor(dv, dv_levels),
emo = factor(emo, emo_levels, emo_labels)) |>
ggplot(aes(x = dv, fill = dv)) +
geom_point(aes(x = x, colour = I(fill), fill = I(fill)),
data.frame(emo = factor(emo_levels, emo_levels, emo_labels),
x = 1:6,
fill = rainbow),
size = 6.5, y = -60, shape = 18, show.legend = FALSE) +
geom_bar(color = "transparent") +
facet_wrap(~emo, axes = "all_x", drop = FALSE) +
scale_x_discrete(labels = c("A", "D", "F", "H", "S", "U", "O")) +
scale_fill_manual(values = c(rainbow, "grey"), drop = FALSE) +
labs(title = "Emotion Ratings",
x = "",
fill = "Rated Emotion") +
coord_cartesian(clip="off") +
theme(axis.ticks.x = element_blank())exp_levels <- c("anger", "disgust", "fear",
"happiness", "sadness", "surprise")
exp |>
filter(exp_id %in% 1385:1390) |>
mutate(dv = as.integer(dv),
exp = factor(exp, exp_levels)) |>
ggplot(aes(x = dv, fill = exp)) +
geom_histogram(binwidth = 1, show.legend = FALSE, color = "black") +
facet_wrap(~exp, ncol = 3, axes = "all_x", drop = FALSE) +
labs(title = "Emotion Intensity Ratings",
x = "") +
scale_fill_manual(values = rainbow, drop = FALSE) +
scale_x_continuous(breaks = 1:7)dv_levels <- seq(20, 85, 5)
dv_labels <- paste(dv_levels-4, "-", dv_levels )
dv_labels[14] <- "81+"
exp |>
filter(exp_id %in% 1400) |>
mutate(dv = factor(dv, dv_levels, dv_labels)) |>
ggplot(aes(x = dv)) +
geom_bar(color = "black", fill = "white") +
scale_x_discrete(drop = FALSE) +
labs(title = "Age Ratings",
x = "")# exp |>
# filter(exp_id %in% 1400) |>
# mutate(dv = as.numeric(dv) - 2.5) |>
# summarise(age = mean(dv), age_sd = sd(dv), .by = trial_name)exp |>
filter(exp_id %in% 1400) |>
mutate(dv = factor(dv, dv_levels, dv_labels)) |>
mutate(trial_name = gsub("_std_neu_0", "", trial_name)) |>
separate(trial_name, c("lab", "id")) |>
count(lab, id, dv) |>
ggplot(aes(x = dv, y = id, fill = n)) +
geom_tile() +
facet_wrap(~lab) +
scale_fill_viridis_c() +
labs(x = "Age", y = NULL) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90))